# Attach these packages so their functions don't need to be qualified: http://r-pkgs.had.co.nz/namespace.html#search-path
library(magrittr) # enables piping : %>%
# library("msm") # multistate modeling (cannot be declared silently)
# # Verify these packages are available on the machine, but their functions need to be qualified: http://r-pkgs.had.co.nz/namespace.html#search-path
requireNamespace("ggplot2") # graphing
requireNamespace("tidyr") # data manipulation
# requireNamespace("dplyr") # Avoid attaching dplyr, b/c its function names conflict with a lot of packages (esp base, stats, and plyr).
library("dplyr") # Avoid attaching dplyr, b/c its function names conflict with a lot of packages (esp base, stats, and plyr).
# requireNamespace("testit")# For asserting conditions meet expected patterns.
# # requireNamespace("car") # For it's `recode()` function.
# requireNamespace("flexsurv") # parameteric survival and multi-state
# requireNamespace("mstate") # multistate modeling
# requireNamespace("foreign") # data input
# Call `base::source()` on any repo file that defines functions needed below. Ideally, no real operations are performed.
source("./scripts/common-functions.R") # used in multiple reports
source("./scripts/graphing/graph-presets.R") # fonts, colors, themes
path_input <- "./data-unshared/derived/0-dto.rds"
#Put code in here. It doesn't call a chunk in the codebehind file.
dt0 <- readRDS(path_input)
ds0 <- dt0
ds0 %>% glimpse(60)
Observations: 9,076
Variables: 53
$ StudentID <dbl> 101101125, 101101189, 101101192, 10110…
$ ATeacherID <chr> "101101", "101101", "101101", "101101"…
$ BTeacherID <chr> "101101", "101101", "101101", "101101"…
$ CTeacherID <chr> "101204", " ", "101203", " "…
$ DTeacherID <chr> "101204", " ", "101203", " "…
$ SchoolID <fct> Cascade Elementary in WA , Cascade Ele…
$ Tx1 <fct> Intervention Group (1), Intervention G…
$ Tx2 <fct> Intervention Group (1), NA, Interventi…
$ ASDQ1 <fct> Certainly True, Somewhat True, Certain…
$ ASDQ2 <fct> Not True, Not True, Not True, Not True…
$ ASDQ3 <fct> Not True, Not True, Not True, Not True…
$ ASDQ4 <fct> Certainly True, Somewhat True, Certain…
$ ASDQ5 <fct> Not True, Not True, Not True, Certainl…
$ ASDQ6 <fct> Not True, Not True, Not True, Not True…
$ ASDQ7 <fct> Certainly True, Certainly True, Certai…
$ ASDQ8 <fct> Not True, Not True, Not True, Not True…
$ ASDQ9 <fct> Certainly True, Somewhat True, Certain…
$ ASDQ10 <fct> Not True, Not True, Not True, Not True…
$ ASDQ11 <fct> Certainly True, Certainly True, Certai…
$ ASDQ12 <fct> Not True, Not True, Not True, Not True…
$ ASDQ13 <fct> Not True, Not True, Not True, Not True…
$ ASDQ14 <fct> Certainly True, Certainly True, Certai…
$ ASDQ15 <fct> Not True, Not True, Not True, Not True…
$ ASDQ16 <fct> Not True, Not True, Not True, Not True…
$ ASDQ17 <fct> Certainly True, Somewhat True, Certain…
$ ASDQ18 <fct> Not True, Not True, Not True, Not True…
$ ASDQ19 <fct> Not True, Not True, Not True, Not True…
$ ASDQ20 <fct> Somewhat True, Somewhat True, Somewhat…
$ ASDQ21 <fct> Certainly True, Somewhat True, Certain…
$ ASDQ22 <fct> Not True, Not True, Not True, Not True…
$ ASDQ23 <fct> Not True, Not True, Not True, Not True…
$ ASDQ24 <fct> Not True, Not True, Not True, Not True…
$ ASDQ25 <fct> Somewhat True, Certainly True, Certain…
$ SDQpro1 <dbl> 1.8, 1.0, 1.8, 0.2, 1.0, 1.0, 0.2, 0.2…
$ SDQpro2 <dbl> 1.6, 1.4, 2.0, 1.0, 0.6, 2.0, 0.2, 1.2…
$ SDQhyper1 <dbl> 0.2, 0.2, 0.0, 0.2, 0.2, 0.2, 0.8, 0.2…
$ SDQhyper2 <dbl> 0.0, 0.6, 0.0, 0.2, 0.6, 0.0, 1.2, 0.6…
$ SDQemot1 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ SDQemot2 <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ SDQcon1 <dbl> 0.0, 0.0, 0.0, 0.6, 0.2, 0.0, 0.4, 0.4…
$ SDQcon2 <dbl> 0.0, 0.2, 0.0, 0.0, 0.8, 0.4, 0.4, 0.2…
$ SDQpeer1 <dbl> 0.0, 0.0, 0.0, 0.6, 0.0, 0.0, 0.4, 0.2…
$ SDQpeer2 <dbl> 0.0, 0.0, 0.2, 0.4, 0.2, 0.0, 0.2, 0.2…
$ SDQpro3 <dbl> 2.0, NA, 1.2, NA, 0.4, NA, 0.4, 1.4, N…
$ SDQpro4 <dbl> 2.0, NA, 1.6, NA, 1.0, NA, 1.0, 1.4, N…
$ SDQhyper3 <dbl> 1.2, NA, 0.0, NA, 0.8, NA, 1.4, 0.4, N…
$ SDQhyper4 <dbl> 0.8, NA, 0.0, NA, 1.0, NA, 1.2, 1.4, N…
$ SDQemot3 <dbl> 0.0, NA, 0.6, NA, 0.0, NA, 0.0, 0.0, N…
$ SDQemot4 <dbl> 0.2, NA, 1.4, NA, 0.0, NA, 0.4, 0.4, N…
$ SDQcon3 <dbl> 0.2, NA, 0.0, NA, 0.4, NA, 1.0, 0.8, N…
$ SDQcon4 <dbl> 0.0, NA, 0.0, NA, 0.4, NA, 0.2, 1.0, N…
$ SDQpeer3 <dbl> 0.2, NA, 0.0, NA, 0.4, NA, 0.6, 0.8, N…
$ SDQpeer4 <dbl> 0.2, NA, 0.2, NA, 0.2, NA, 0.0, 1.4, N…
ds0 <- ds0 %>%
dplyr::mutate(
school_id = as.character(SchoolID)
) %>%
dplyr::filter(!SchoolID == "Mark Twain Elementary in WA_duplicated_140") %>%
dplyr::mutate(
school_id = ifelse(school_id == "Benson Hill Elementary in WA'",
"Benson Hill Elementary in WA", school_id)
)
regex = '(.+)( in )([A-Z]{2})( ?)'
ds0 <- ds0 %>%
dplyr::mutate(
school_id = gsub(" Elementary ", " ", school_id)
,school_name = gsub(regex, "\\1", school_id)
,school_state = gsub(regex, "\\3", school_id)
)
unique(ds0$SchoolID)
[1] Cascade Elementary in WA Bryn Mawr Elementary in WA
[3] Tiffany Park Elementary in WA Sierra Heights Elementary in WA
[5] Kennydale Elementary in WA Nautilus K-8 in WA
[7] Twin Lakes Elementary in WA Lakeland Elementary in WA
[9] Woodmont K-8 in WA Silver Lake Elementary in WA
[11] Mark Twain Elementary in WA Olympic View Elementary in WA
[13] Suquamish Elementary in WA Hilder Pearson Elementary in WA
[15] Poulsbo Elementary in WA Breidablik Elementary in WA
[17] Vinland Elementary in WA Redmond Elementary in WA
[19] Juanita Elementary in WA Rose Hill Elementary in WA
[21] Elizabeth Blackwell Elementary in WA Olivia Park Elementary in WA
[23] Campbell Hill Elementary in WA Talbot Hill Elementary in WA
[25] Highlands Elementary in WA Benson Hill Elementary in WA'
[27] Maplewood Heights Elementary in WA Honey Dew Elementary in WA
[29] Richard Gordon Elementary in WA David Wolfle Elementary in WA
[31] Green Gables Elementary in WA Sherwood Forest Elementary in WA
[33] Brigadoon Elementary in WA Rainier View Elementary in WA
[35] Mirror Lake Elementary in WA Wildwood Elementary in WA
[37] Alexander Graham Bell Elementary in WA Einstein Elementary in WA
[39] Rachel Carson Elementary in WA Fairmount Elementary in WA
[41] Highland Elementary in AZ Johnson Elementary in AZ
[43] Irving Elementary in AZ Taft Elementary in AZ
[45] Edison Elementary in AZ Robson Elementary in AZ
[47] Webster Elementary in AZ Lindbergh Elementary in AZ
[49] Adams Elementary in AZ Lincoln Elementary in AZ
[51] Mendoza Elementary in AZ Entz Elementary in AZ
[53] Hawthorne Elementary in AZ Wilson Elementary in AZ
[55] Porter Elementary in AZ Salk Elementary in AZ
[57] Kerr Elementary in AZ Redbird Elementary in AZ
[59] Eisenhower Elementary in AZ Longfellow Elementary in AZ
61 Levels: Cascade Elementary in WA Bryn Mawr Elementary in WA ... Longfellow Elementary in AZ
t1 <- ds0 %>%
dplyr::group_by(SchoolID) %>%
dplyr::summarize(
n_students = length(unique(StudentID))
)
t1 %>% neat()
| SchoolID | n_students |
|---|---|
| Cascade Elementary in WA | 142 |
| Bryn Mawr Elementary in WA | 174 |
| Tiffany Park Elementary in WA | 135 |
| Sierra Heights Elementary in WA | 153 |
| Kennydale Elementary in WA | 136 |
| Nautilus K-8 in WA | 134 |
| Twin Lakes Elementary in WA | 152 |
| Lakeland Elementary in WA | 149 |
| Woodmont K-8 in WA | 149 |
| Silver Lake Elementary in WA | 156 |
| Mark Twain Elementary in WA | 177 |
| Olympic View Elementary in WA | 156 |
| Suquamish Elementary in WA | 150 |
| Hilder Pearson Elementary in WA | 130 |
| Breidablik Elementary in WA | 126 |
| Vinland Elementary in WA | 137 |
| Redmond Elementary in WA | 146 |
| Juanita Elementary in WA | 138 |
| Rose Hill Elementary in WA | 135 |
| Elizabeth Blackwell Elementary in WA | 127 |
| Olivia Park Elementary in WA | 140 |
| Campbell Hill Elementary in WA | 140 |
| Highlands Elementary in WA | 140 |
| Benson Hill Elementary in WA’ | 145 |
| Talbot Hill Elementary in WA | 147 |
| Maplewood Heights Elementary in WA | 150 |
| Honey Dew Elementary in WA | 144 |
| Richard Gordon Elementary in WA | 169 |
| Poulsbo Elementary in WA | 131 |
| David Wolfle Elementary in WA | 138 |
| Green Gables Elementary in WA | 134 |
| Sherwood Forest Elementary in WA | 129 |
| Brigadoon Elementary in WA | 147 |
| Rainier View Elementary in WA | 137 |
| Mirror Lake Elementary in WA | 146 |
| Wildwood Elementary in WA | 160 |
| Alexander Graham Bell Elementary in WA | 136 |
| Einstein Elementary in WA | 162 |
| Rachel Carson Elementary in WA | 118 |
| Fairmount Elementary in WA | 147 |
| Highland Elementary in AZ | 173 |
| Johnson Elementary in AZ | 155 |
| Irving Elementary in AZ | 141 |
| Taft Elementary in AZ | 144 |
| Edison Elementary in AZ | 171 |
| Robson Elementary in AZ | 145 |
| Webster Elementary in AZ | 160 |
| Lindbergh Elementary in AZ | 140 |
| Adams Elementary in AZ | 170 |
| Lincoln Elementary in AZ | 161 |
| Mendoza Elementary in AZ | 151 |
| Entz Elementary in AZ | 171 |
| Wilson Elementary in AZ | 154 |
| Porter Elementary in AZ | 128 |
| Salk Elementary in AZ | 163 |
| Kerr Elementary in AZ | 167 |
| Hawthorne Elementary in AZ | 189 |
| Redbird Elementary in AZ | 148 |
| Eisenhower Elementary in AZ | 164 |
| Longfellow Elementary in AZ | 151 |
# How many students in each school
g1 <- ds0 %>%
dplyr::group_by(SchoolID) %>%
dplyr::summarize(
n_students = length(unique(StudentID))
)
g1 %>%
TabularManifest::histogram_continuous("n_students")
# Scores of a measure in each school
g2 <- ds0 %>%
ggplot2::ggplot(
aes(
x = SDQpro1
,fill = school_state
)
) +
geom_bar() +
facet_wrap("school_name") +
theme_minimal()
g2
Warning: Removed 2603 rows containing non-finite values (stat_count).
# denisty of a measure in across schools
g3 <- ds0 %>%
ggplot2::ggplot(
aes(
x = SDQpro1
,fill = school_state
)
) +
geom_density() +
facet_wrap("school_name") +
theme_minimal()
g3
Warning: Removed 2603 rows containing non-finite values (stat_density).
# box plot of a variable and the treatment variable
g4 <- ds0 %>%
ggplot2::ggplot(
aes(
x = Tx1
,y = SDQpro1
,fill = Tx1
)
) +
geom_boxplot() +
coord_flip() +
theme_minimal()
g4
Warning: Removed 2603 rows containing non-finite values (stat_boxplot).
# Freqpoly
g5 <- ds0 %>%
ggplot2::ggplot(
mapping = aes(
x = SDQpro1
)
) +
geom_freqpoly(
mapping = aes(
color = Tx1
) , bindwidth = 100
) +
theme_minimal()
Warning: Ignoring unknown parameters: bindwidth
g5
Warning: Removed 2603 rows containing non-finite values (stat_bin).
g6 <- ds0 %>%
ggplot2::ggplot(
mapping = aes(
x = SDQpro1
,y = SDQpro2
)
) +
ggplot2::geom_point()+
ggplot2::geom_smooth()+
ggplot2::geom_jitter()+
theme_minimal()
g6
Warning: Removed 3617 rows containing non-finite values (stat_smooth).
Warning: Removed 3617 rows containing missing values (geom_point).
Warning: Removed 3617 rows containing missing values (geom_point).
g6_2 <- ds0 %>%
subset(!is.na(Tx1)) %>%
ggplot2::ggplot(
aes(
x = school_name
,y = SDQpro1
)
) +
geom_point(
alpha = 0.3
,color = "tomato"
,position = "jitter") +
geom_boxplot(alpha=0) +
coord_flip() +
facet_grid(. ~ Tx1) +
theme_minimal()
g6_2
Warning: Removed 957 rows containing non-finite values (stat_boxplot).
Warning: Removed 957 rows containing missing values (geom_point).
g7 <- ds0 %>%
ggplot2::ggplot() +
ggplot2::geom_smooth(
mapping = aes(
x = SDQpro1
,y = SDQpro2
,color = Tx1
)
) +
theme_minimal()
g7
Warning: Removed 3617 rows containing non-finite values (stat_smooth).
g8 <- ds0 %>%
ggplot2::ggplot() +
ggplot2::geom_smooth(
mapping = aes(
x = SDQpro1
,y = SDQpro2
,color = Tx1
)
) +
facet_wrap("school_name") +
theme_minimal()
g8
Warning: Removed 3617 rows containing non-finite values (stat_smooth).
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric, : pseudoinverse used at 1
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric, : neighborhood radius 0.4
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric, : reciprocal condition number 4.9396e-17
Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at 1
Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius 0.4
Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition number 4.9396e-17
vars <- c("StudentID","school_name", "school_state", "Tx1")
d9_long <- ds0 %>%
dplyr::select_(.dots = c(vars, paste0("SDQpro",1:4))) %>%
tidyr::gather(
key = "key" # name of new variable to store the key
,value = "value" # name of new variable to store the values
,SDQpro1, SDQpro2, SDQpro3, SDQpro4 # selection of columns to gather
) %>%
dplyr::mutate(
prosocial = value, # the variable that will store the value of SDQ
wave = gsub(pattern = "SDQpro(\\d+)",replacement = "\\1", x = key)
) %>%
dplyr::select(-key, -value) %>%
dplyr::arrange(StudentID)
Warning: select_() is deprecated.
Please use select() instead
The 'programming' vignette or the tidyeval book can help you
to program with select() : https://tidyeval.tidyverse.org
This warning is displayed once per session.
d9_long %>% print()
# A tibble: 35,632 x 6
StudentID school_name school_state Tx1 prosocial wave
<dbl> <chr> <chr> <fct> <dbl> <chr>
1 101101125 Cascade WA Intervention Group (1) 1.8 1
2 101101125 Cascade WA Intervention Group (1) 1.6 2
3 101101125 Cascade WA Intervention Group (1) 2 3
4 101101125 Cascade WA Intervention Group (1) 2 4
5 101101189 Cascade WA Intervention Group (1) 1 1
6 101101189 Cascade WA Intervention Group (1) 1.4 2
7 101101189 Cascade WA Intervention Group (1) NA 3
8 101101189 Cascade WA Intervention Group (1) NA 4
9 101101192 Cascade WA Intervention Group (1) 1.8 1
10 101101192 Cascade WA Intervention Group (1) 2 2
# … with 35,622 more rows
p <- d9_long %>%
ggplot2::ggplot(
mapping = aes(
x = wave
,y = prosocial
,group = StudentID
,color = Tx1
)
)
p + geom_point() + geom_jitter()
Warning: Removed 12462 rows containing missing values (geom_point).
Warning: Removed 12462 rows containing missing values (geom_point).
p + geom_line()
Warning: Removed 11898 rows containing missing values (geom_path).
p + geom_line() + facet_grid(. ~ Tx1)
Warning: Removed 11898 rows containing missing values (geom_path).
p + geom_line() + facet_wrap(~school_name)
Warning: Removed 11898 rows containing missing values (geom_path).
d9_long_2 <- d9_long %>%
group_by(school_name, wave) %>%
mutate(
sm_prosocial = mean(x = prosocial, na.rm = TRUE)
)
d9_long_2 %>% print()
# A tibble: 35,632 x 7
# Groups: school_name, wave [240]
StudentID school_name school_state Tx1 prosocial wave sm_prosocial
<dbl> <chr> <chr> <fct> <dbl> <chr> <dbl>
1 101101125 Cascade WA Intervention Group (1) 1.8 1 1.22
2 101101125 Cascade WA Intervention Group (1) 1.6 2 1.43
3 101101125 Cascade WA Intervention Group (1) 2 3 1.44
4 101101125 Cascade WA Intervention Group (1) 2 4 1.47
5 101101189 Cascade WA Intervention Group (1) 1 1 1.22
6 101101189 Cascade WA Intervention Group (1) 1.4 2 1.43
7 101101189 Cascade WA Intervention Group (1) NA 3 1.44
8 101101189 Cascade WA Intervention Group (1) NA 4 1.47
9 101101192 Cascade WA Intervention Group (1) 1.8 1 1.22
10 101101192 Cascade WA Intervention Group (1) 2 2 1.43
# … with 35,622 more rows
s <- d9_long_2 %>%
ggplot2::ggplot(
mapping = aes(
x = wave
,y = sm_prosocial
,group = school_name
,color = school_state
)
)
s + ggplot2::geom_point(
#shape = 19 # Use solid circles
shape = 1 # Use hollow circles
,alpha = 1/4 # 1/4 opacity
) +
ggplot2::geom_jitter()
s + ggplot2::geom_line()
s + ggplot2::geom_line() + facet_wrap(~school_name)
# similar to the geom_line above
# s +
# ggplot2::geom_smooth() +
# facet_wrap("school_name") +
# theme_minimal()
g15 <- d9_long %>%
group_by(school_name, wave) %>%
mutate(
sm_prosocial = mean(x = prosocial, na.rm = TRUE)
) %>%
subset(!is.na(Tx1)) %>%
ggplot2::ggplot(
aes(
x = sm_prosocial
,fill = Tx1
)
) +
geom_density() +
facet_wrap("wave") +
theme_minimal()
g15